#!/usr/bin/env octave

% Copyright (C) 2019 Kei Kebreau
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program.  If not, see <http://www.gnu.org/licenses/>.

clear -g;
global PROJECTION_DISTANCE = 2.2;
global HYDROXL_ATOM = 'O';
% It's probably not good practice to compare floating-point values like this.
global TOLERANCE = 1e-6;

% Arguments: 1x3 location of atom and the planes it may be on.
%
% Returns: Indices of the planes where the atom is located.
function indices = which_planes (location, planes)
  global TOLERANCE;
  indices = [];
  for plane = 1:size(planes)(1)
    if abs(distancePointPlane(location, planes(plane, :))) < TOLERANCE
      indices(end + 1) = plane;
    end
  end
end

% Arguments: Location of atom, the normals to planes, and the index of the plane
% to project from.
%
% Returns: New location projected from the given plane.
function new_location = project(location, plane_normals, plane_index)
  global PROJECTION_DISTANCE;
  new_location = zeros(1, 3);
  new_location(1) = location(1) + sign(plane_normals(plane_index, 1)) * PROJECTION_DISTANCE / sqrt(3);
  new_location(2) = location(2) + sign(plane_normals(plane_index, 2)) * PROJECTION_DISTANCE / sqrt(3);
  new_location(3) = location(3) + sign(plane_normals(plane_index, 3)) * PROJECTION_DISTANCE / sqrt(3);
end

% Let user select input file.
[input_file, file_path] = uigetfile({'*.xyz*', 'Supported file formats'});
% Load the input file.
raw_data = fileread([file_path input_file]);
[~, file_name, ~] = fileparts(input_file);

cd_regexp = ['Cd' repmat('\s+([+-]?[0-9]*\.?[0-9]+)', [1,3])];
cd_locations = regexp(raw_data, cd_regexp, 'tokens');
% Convert cd_locations from a nested cell array of strings to rows of XYZ coordinates.
cd_locations = reshape(cellfun(@str2num, cell2mat(cd_locations)), 3, [])';
num_atoms = str2num(regexp(raw_data, '^(\d+)', 'tokens', 'once'){1});
max_hydroxl = num_atoms - length(cd_locations);

new_atom_count = 0;
new_atoms = '';

% Find vertices and faces of the minimum convex hull and their planes.
faces = convhull(cd_locations);
vertices = cd_locations(unique(faces), :);
planes = zeros(length(faces), 9);
plane_normals = zeros(length(faces), 3);
for plane = 1:size(planes)(1)
  planes(plane, :) = createPlane(vertices([1 2 3 4]([1 2 3 4] ~= plane), :));
  plane_normals(plane, :) = planeNormal(planes(plane, :));
end

% Orient the normals of these planes away from the center of the tetrahedron.
plane_normals(1, :) = plane_normals(1, :) * -1;
plane_normals(3, :) = plane_normals(3, :) * -1;

for row = 1:length(cd_locations)
  cd_atom = cd_locations(row, :);
  cd_faces = which_planes(cd_atom, planes);
  
  % After the coordinates, indices representing the planes that the new atom is
  % on will be placed for further processing by the hydroxl distance calculation
  % script! This should not interfere with proper processing of the resulting
  % .xyz files.
  for cd_face = 1:length(cd_faces)
    new_atom_location = project(cd_atom, plane_normals, cd_faces(cd_face));
    new_atoms{new_atom_count + 1} = sprintf('N %s',
                                            sprintf('%f ', new_atom_location));
    new_atom_count = new_atom_count + 1;
  end
end

output_dir = uigetdir('.', 'Select output directory for output xyz files');
% Let the user input how many output files they want.
num_clusters = str2num(inputdlg('How many clusters to generate?', 'Cluster count'){1});

% Determine how many leading zeros are necessary for correctly listing output clusters.
leading_zeros = floor(log10(num_clusters)) + 1;
original_new_atoms = new_atoms;

xyz_format_str = ['%s%s%s_random_%0' num2str(leading_zeros) 'd.xyz'];
loading_bar = waitbar(0, 'Generating randomly ligated clusters...');

for cluster = 1:num_clusters
  new_atoms = original_new_atoms;
  rand_mutations = randperm(new_atom_count)(1:max_hydroxl);
  for r = rand_mutations
    new_atoms{r} = strrep(new_atoms{r}, 'N', HYDROXL_ATOM);
  end
  xyz_name = sprintf(xyz_format_str, output_dir, filesep, file_name, cluster);
  xyz_id = fopen(xyz_name, 'w');
  fdisp(xyz_id, sprintf('%d\n', num_atoms + new_atom_count));
  fdisp(xyz_id, regexp(raw_data, '(Cd.*)\n', 'tokens', 'once'){1});
  fdisp(xyz_id, strjoin(new_atoms, '\n'));
  fclose(xyz_id);
  waitbar(cluster / num_clusters, loading_bar);
end

close(loading_bar);
clear -g;
